Morphological variation of amazon and sailfin mollies {.tabset .tabset-fade .tabset-pills}

In this study, I am looking at various populations of amazon and sailfin mollies across their native/introduced range to assess morphological variation both within and among the species. I am using the Pickle fish collections for my samples.

Initial steps

Data collection

## Using libcurl 8.3.0 with Schannel

Checking normality

I will use Shapiro-wilke, histograms, and QQ plots to determine what traits are normal. These will only be performed on continuous variables, as discrete variables are not normal by nature.

Conclusions: literally all of them are NOT normal… will log transform them and run parametric tests (t-test, F-test, ANOVA). Not sure what to do with the discrete variables…

SW Test

shapiro.test(raw1$SL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$SL
## W = 0.9763, p-value = 2.763e-05
shapiro.test(raw1$BD)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$BD
## W = 0.96287, p-value = 1.787e-07
shapiro.test(raw1$CPD)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$CPD
## W = 0.96431, p-value = 2.908e-07
shapiro.test(raw1$CPL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$CPL
## W = 0.97288, p-value = 6.831e-06
shapiro.test(raw1$PreDL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$PreDL
## W = 0.97924, p-value = 9.997e-05
shapiro.test(raw1$DbL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$DbL
## W = 0.97697, p-value = 3.68e-05
shapiro.test(raw1$HL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$HL
## W = 0.94955, p-value = 3.057e-09
shapiro.test(raw1$HD)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$HD
## W = 0.96761, p-value = 9.28e-07
shapiro.test(raw1$HW)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$HW
## W = 0.97201, p-value = 4.833e-06
shapiro.test(raw1$SnL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$SnL
## W = 0.65042, p-value < 2.2e-16
shapiro.test(raw1$OL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$OL
## W = 0.98631, p-value = 0.003102

Histograms

hist(raw1$SL)

hist(raw1$BD)

hist(raw1$CPD)

hist(raw1$CPL)

hist(raw1$PreDL)

hist(raw1$DbL)

hist(raw1$HL)

hist(raw1$HD)

hist(raw1$HW)

hist(raw1$SnL)

hist(raw1$OL)

QQ plots

qqnorm(raw1$SL)
qqline(raw1$SL)

qqnorm(raw1$BD)
qqline(raw1$BD)

qqnorm(raw1$CPD)
qqline(raw1$CPD)

qqnorm(raw1$CPL)
qqline(raw1$CPL)

qqnorm(raw1$PreDL)
qqline(raw1$PreDL)

qqnorm(raw1$DbL)
qqline(raw1$DbL)

qqnorm(raw1$HL)
qqline(raw1$HL)

qqnorm(raw1$HD)
qqline(raw1$HD)

qqnorm(raw1$HW)
qqline(raw1$HW)

qqnorm(raw1$SnL)
qqline(raw1$SnL)

qqnorm(raw1$OL)
qqline(raw1$OL)

Log transformations

Since all of the continuous characters were not normal, I will log transform them, and proceed with the transformed data in all analyses.

raw1[paste0(names(raw1), '_log')] <- log(raw1[, 26:35], 10)
#for some reason did the log of the whole data frame instead of those specific columns, so I will just remove the uncessary columns instead of spending time on what went wrong.

raw1 <- raw1[-c(37:60)]
raw1 <- raw1[-c(48)]

#double checking normality with a SW test and QQ plot

shapiro.test(raw1$SL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$SL_log
## W = 0.99271, p-value = 0.1056
shapiro.test(raw1$BD_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$BD_log
## W = 0.82598, p-value < 2.2e-16
shapiro.test(raw1$CPD_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$CPD_log
## W = 0.99285, p-value = 0.1144
shapiro.test(raw1$CPL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$CPL_log
## W = 0.99513, p-value = 0.3824
shapiro.test(raw1$PreDL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$PreDL_log
## W = 0.93554, p-value = 8.204e-11
shapiro.test(raw1$DbL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$DbL_log
## W = 0.98042, p-value = 0.0001711
shapiro.test(raw1$HL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$HL_log
## W = 0.99386, p-value = 0.1984
shapiro.test(raw1$HD_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$HD_log
## W = 0.99661, p-value = 0.7102
shapiro.test(raw1$HW_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$HW_log
## W = 0.99491, p-value = 0.3435
shapiro.test(raw1$SnL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$SnL_log
## W = 0.99366, p-value = 0.1788
shapiro.test(raw1$OL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$OL_log
## W = 0.99271, p-value = 0.1056
qqnorm(raw1$SL_log)
qqline(raw1$SL_log)

qqnorm(raw1$BD_log)
qqline(raw1$BD_log)

qqnorm(raw1$CPD_log)
qqline(raw1$CPD_log)

qqnorm(raw1$CPL_log)
qqline(raw1$CPL_log)

qqnorm(raw1$PreDL_log)
qqline(raw1$PreDL_log)

qqnorm(raw1$DbL_log)
qqline(raw1$DbL_log)

qqnorm(raw1$HL_log)
qqline(raw1$HL_log)

qqnorm(raw1$HD_log)
qqline(raw1$HD_log)

qqnorm(raw1$HW_log)
qqline(raw1$HW_log)

qqnorm(raw1$SnL_log)
qqline(raw1$SnL_log)

qqnorm(raw1$OL_log)
qqline(raw1$OL_log)

Correcting for body size (residuals)

Since amazons are in general bigger than sailfin, we don’t want any results to be due to this difference in body size bias. Therefore, we will see what traits are influenced by body size (regressions) and correct for body size when necessary (absolute value of residuals). We can then use the residuals when comparing between species for traits that are influenced by body size, and raw/transformed data for traits that are not influenced by body size. I will also calculate standardized residuals to compare residuals across traits in later analyses.

Quick results summary: traits not influenced by body size are left & right pelvic, anal, scales below lateral line and fluctuating asymmetry; all other traits influenced by body size.

Regressions

  • STEP ONE: plot trait vs body size
library(ggplot2)
library(ggpubr)



lat <- raw1[raw1$SPP == "p.latipinna",]


form <- raw1[raw1$SPP == "p.formosa",]


mex <- raw1[raw1$SPP == "p.mexicana",]



##### LAT #####
reg.lat.D <- lm(lat$D ~ lat$SL)
sd.lat.D <- rstandard(reg.lat.D)
reg.lat.D.plot <- ggplot(lat, aes(x = SL, y = D)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.D.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.P1 <- lm(lat$P1 ~ lat$SL)
sd.lat.P1 <- rstandard(reg.lat.P1)
reg.lat.P1.plot <- ggplot(lat, aes(x = SL, y = P1)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.P1.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.P2.L <- lm(lat$P2.L ~ lat$SL)
sd.lat.P2.L <- rstandard(reg.lat.P2.L)
reg.lat.P2.L.plot <- ggplot(lat, aes(x = SL, y = P2.L)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.P2.L.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.P2.R <- lm(lat$P2.R ~ lat$SL)
sd.lat.P2.R <- rstandard(reg.lat.P2.R)
reg.lat.P2.R.plot <- ggplot(lat, aes(x = SL, y = P2.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.P2.R.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.A <- lm(lat$A ~ lat$SL)
sd.lat.A <- rstandard(reg.lat.A)
reg.lat.A.plot <- ggplot(lat, aes(x = SL, y = A)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.A.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.P1.R <- lm(lat$P1.R ~ lat$SL)
sd.lat.P1.R <- rstandard(reg.lat.P1.R)
reg.lat.P1.R.plot <- ggplot(lat, aes(x = SL, y = P1.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.P1.R.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.LLSC <- lm(lat$LLSC ~ lat$SL)
sd.lat.LLSC <- rstandard(reg.lat.LLSC)
reg.lat.LLSC.plot <- ggplot(lat, aes(x = SL, y = LLSC)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.LLSC.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.SALL <- lm(lat$SALL ~ lat$SL)
sd.lat.SALL <- rstandard(reg.lat.SALL)
reg.lat.SALL.plot <- ggplot(lat, aes(x = SL, y = SALL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.SALL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.SBLL <- lm(lat$SBLL ~ lat$SL)
sd.lat.SBLL <- rstandard(reg.lat.SBLL)
reg.lat.SBLL.plot <- ggplot(lat, aes(x = SL, y = SBLL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.SBLL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.SBDF <- lm(lat$SBDF ~ lat$SL)
sd.lat.SBDF <- rstandard(reg.lat.SBDF)
reg.lat.SBDF.plot <- ggplot(lat, aes(x = SL, y = SBDF)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.SBDF.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.BD <- lm(lat$BD_log ~ lat$SL)
sd.lat.BD <- rstandard(reg.lat.BD)
reg.lat.BD.plot <- ggplot(lat, aes(x = SL, y = BD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.BD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.CPD <- lm(lat$CPD_log ~ lat$SL)
sd.lat.CPD <- rstandard(reg.lat.CPD)
reg.lat.CPD.plot <- ggplot(lat, aes(x = SL, y = CPD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.CPD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.CPL <- lm(lat$CPL_log ~ lat$SL)
sd.lat.CPL <- rstandard(reg.lat.CPL)
reg.lat.CPL.plot <- ggplot(lat, aes(x = SL, y = CPL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.CPL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.PreDL <- lm(lat$PreDL_log ~ lat$SL)
sd.lat.PreDL <- rstandard(reg.lat.PreDL)
reg.lat.PreDL.plot <- ggplot(lat, aes(x = SL, y = PreDL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.PreDL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.DbL <- lm(lat$DbL_log ~ lat$SL)
sd.lat.DbL <- rstandard(reg.lat.DbL)
reg.lat.DbL.plot <- ggplot(lat, aes(x = SL, y = DbL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.DbL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.HL <- lm(lat$HL_log ~ lat$SL)
sd.lat.HL <- rstandard(reg.lat.HL)
reg.lat.HL.plot <- ggplot(lat, aes(x = SL, y = HL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.HL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.HD <- lm(lat$HD_log ~ lat$SL)
sd.lat.HD <- rstandard(reg.lat.HD)
reg.lat.HD.plot <- ggplot(lat, aes(x = SL, y = HD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.HD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.HW <- lm(lat$HW_log ~ lat$SL)
sd.lat.HW <- rstandard(reg.lat.HW)
reg.lat.HW.plot <- ggplot(lat, aes(x = SL, y = HW_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.HW.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.SnL <- lm(lat$SnL_log ~ lat$SL)
sd.lat.SnL <- rstandard(reg.lat.SnL)
reg.lat.SnL.plot <- ggplot(lat, aes(x = SL, y = SnL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.SnL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.OL <- lm(lat$OL_log ~ lat$SL)
sd.lat.OL <- rstandard(reg.lat.OL)
reg.lat.OL.plot <- ggplot(lat, aes(x = SL, y = OL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.OL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.FLA <- lm(lat$FLA ~ lat$SL)
sd.lat.FLA <- rstandard(reg.lat.FLA)
reg.lat.FLA.plot <- ggplot(lat, aes(x = SL, y = FLA)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.FLA.plot
## `geom_smooth()` using formula = 'y ~ x'

##### FORM #####

reg.form.D <- lm(form$D ~ form$SL)
sd.form.D <- rstandard(reg.form.D)
reg.form.D.plot <- ggplot(form, aes(x =SL, y = D)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.D.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.P1 <- lm(form$P1 ~ form$SL)
sd.form.P1 <- rstandard(reg.form.P1)
reg.form.P1.plot <- ggplot(form, aes(x = SL, y = P1)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.P1.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.P2.L <- lm(form$P2.L ~ form$SL)
sd.form.P2.L <- rstandard(reg.form.P2.L)
reg.form.P2.L.plot <- ggplot(form, aes(x = SL, y = P2.L)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.P2.L.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.P2.R <- lm(form$P2.R ~ form$SL)
sd.form.P2.R <- rstandard(reg.form.P2.R)
reg.form.P2.R.plot <- ggplot(form, aes(x = SL, y = P2.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.P2.R.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.A <- lm(form$A ~ form$SL)
sd.form.A <- rstandard(reg.form.A)
reg.form.A.plot <- ggplot(form, aes(x = SL, y = A)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.A.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.P1.R <- lm(form$P1.R ~ form$SL)
sd.form.P1.R <- rstandard(reg.form.P1.R)
reg.form.P1.R.plot <- ggplot(form, aes(x = SL, y = P1.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.P1.R.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.LLSC <- lm(form$LLSC ~ form$SL)
sd.form.LLSC <- rstandard(reg.form.LLSC)
reg.form.LLSC.plot <- ggplot(form, aes(x = SL, y = LLSC)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.LLSC.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.SALL <- lm(form$SALL ~ form$SL)
sd.form.SALL <- rstandard(reg.form.SALL)
reg.form.SALL.plot <- ggplot(form, aes(x = SL, y = SALL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.SALL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.SBLL <- lm(form$SBLL ~ form$SL)
sd.form.SBLL <- rstandard(reg.form.SBLL)
reg.form.SBLL.plot <- ggplot(form, aes(x = SL, y = SBLL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.SBLL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.SBDF <- lm(form$SBDF ~ form$SL)
sd.form.SBDF <- rstandard(reg.form.SBDF)
reg.form.SBDF.plot <- ggplot(form, aes(x = SL, y = SBDF)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.SBDF.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.BD <- lm(form$BD_log ~ form$SL)
sd.form.BD <- rstandard(reg.form.BD)
reg.form.BD.plot <- ggplot(form, aes(x = SL, y = BD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.BD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.CPD <- lm(form$CPD_log ~ form$SL)
sd.form.CPD <- rstandard(reg.form.CPD)
reg.form.CPD.plot <- ggplot(form, aes(x = SL, y = CPD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.CPD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.CPL <- lm(form$CPL_log ~ form$SL)
sd.form.CPL <- rstandard(reg.form.CPL)
reg.form.CPL.plot <- ggplot(form, aes(x = SL, y = CPL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.CPL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.PreDL <- lm(form$PreDL_log ~ form$SL)
sd.form.PreDL <- rstandard(reg.form.PreDL)
reg.form.PreDL.plot <- ggplot(form, aes(x = SL, y = PreDL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.PreDL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.DbL <- lm(form$DbL_log ~ form$SL)
sd.form.DbL <- rstandard(reg.form.DbL)
reg.form.DbL.plot <- ggplot(form, aes(x = SL, y = DbL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.DbL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.HL <- lm(form$HL_log ~ form$SL)
sd.form.HL <- rstandard(reg.form.HL)
reg.form.HL.plot <- ggplot(form, aes(x = SL, y = HL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.HL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.HD <- lm(form$HD_log ~ form$SL)
sd.form.HD <- rstandard(reg.form.HD)
reg.form.HD.plot <- ggplot(form, aes(x = SL, y = HD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.HD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.HW <- lm(form$HW_log ~ form$SL)
sd.form.HW <- rstandard(reg.form.HW)
reg.form.HW.plot <- ggplot(form, aes(x = SL, y = HW_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.HW.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.SnL <- lm(form$SnL_log ~ form$SL)
sd.form.SnL <- rstandard(reg.form.SnL)
reg.form.SnL.plot <- ggplot(form, aes(x = SL, y = SnL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.SnL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.OL <- lm(form$OL_log ~ form$SL)
sd.form.OL <- rstandard(reg.form.OL)
reg.form.OL.plot <- ggplot(form, aes(x = SL, y = OL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.OL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.FLA <- lm(form$FLA ~ form$SL)
sd.form.FLA <- rstandard(reg.form.FLA)
reg.form.FLA.plot <- ggplot(form, aes(x = SL, y = FLA)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.FLA.plot
## `geom_smooth()` using formula = 'y ~ x'

##### MEX #####
reg.mex.D <- lm(mex$D ~ mex$SL)
sd.mex.D <- rstandard(reg.mex.D)
reg.mex.D.plot <- ggplot(mex, aes(x = SL, y = D)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.D.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.P1 <- lm(mex$P1 ~ mex$SL)
sd.mex.P1 <- rstandard(reg.mex.P1)
reg.mex.P1.plot <- ggplot(mex, aes(x = SL, y = P1)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.P1.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.P2.L <- lm(mex$P2.L ~ mex$SL)
sd.mex.P2.L <- rstandard(reg.mex.P2.L)
reg.mex.P2.L.plot <- ggplot(mex, aes(x = SL, y = P2.L)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.P2.L.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.P2.R <- lm(mex$P2.R ~ mex$SL)
sd.mex.P2.R <- rstandard(reg.mex.P2.R)
reg.mex.P2.R.plot <- ggplot(mex, aes(x = SL, y = P2.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.P2.R.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.A <- lm(mex$A ~ mex$SL)
sd.mex.A <- rstandard(reg.mex.A)
reg.mex.A.plot <- ggplot(mex, aes(x = SL, y = A)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.A.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.P1.R <- lm(mex$P1.R ~ mex$SL)
sd.mex.P1.R <- rstandard(reg.mex.P1.R)
reg.mex.P1.R.plot <- ggplot(mex, aes(x = SL, y = P1.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.P1.R.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.LLSC <- lm(mex$LLSC ~ mex$SL)
sd.mex.LLSC <- rstandard(reg.mex.LLSC)
reg.mex.LLSC.plot <- ggplot(mex, aes(x = SL, y = LLSC)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.LLSC.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.SALL <- lm(mex$SALL ~ mex$SL)
sd.mex.SALL <- rstandard(reg.mex.SALL)
reg.mex.SALL.plot <- ggplot(mex, aes(x = SL, y = SALL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.SALL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.SBLL <- lm(mex$SBLL ~ mex$SL)
sd.mex.SBLL <- rstandard(reg.mex.SBLL)
reg.mex.SBLL.plot <- ggplot(mex, aes(x = SL, y = SBLL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.SBLL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.SBDF <- lm(mex$SBDF ~ mex$SL)
sd.mex.SBDF <- rstandard(reg.mex.SBDF)
reg.mex.SBDF.plot <- ggplot(mex, aes(x = SL, y = SBDF)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.SBDF.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.BD <- lm(mex$BD_log ~ mex$SL)
sd.mex.BD <- rstandard(reg.mex.BD)
reg.mex.BD.plot <- ggplot(mex, aes(x = SL, y = BD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.BD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.CPD <- lm(mex$CPD_log ~ mex$SL)
sd.mex.CPD <- rstandard(reg.mex.CPD)
reg.mex.CPD.plot <- ggplot(mex, aes(x = SL, y = CPD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.CPD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.CPL <- lm(mex$CPL_log ~ mex$SL)
sd.mex.CPL <- rstandard(reg.mex.CPL)
reg.mex.CPL.plot <- ggplot(mex, aes(x = SL, y = CPL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.CPL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.PreDL <- lm(mex$PreDL_log ~ mex$SL)
sd.mex.PreDL <- rstandard(reg.mex.PreDL)
reg.mex.PreDL.plot <- ggplot(mex, aes(x = SL, y = PreDL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.PreDL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.DbL <- lm(mex$DbL_log ~ mex$SL)
sd.mex.DbL <- rstandard(reg.mex.DbL)
reg.mex.DbL.plot <- ggplot(mex, aes(x = SL, y = DbL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.DbL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.HL <- lm(mex$HL_log ~ mex$SL)
sd.mex.HL <- rstandard(reg.mex.HL)
reg.mex.HL.plot <- ggplot(mex, aes(x = SL, y = HL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.HL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.HD <- lm(mex$HD_log ~ mex$SL)
sd.mex.HD <- rstandard(reg.mex.HD)
reg.mex.HD.plot <- ggplot(mex, aes(x = SL, y = HD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.HD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.HW <- lm(mex$HW_log ~ mex$SL)
sd.mex.HW <- rstandard(reg.mex.HW)
reg.mex.HW.plot <- ggplot(mex, aes(x = SL, y = HW_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.HW.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.SnL <- lm(mex$SnL_log ~ mex$SL)
sd.mex.SnL <- rstandard(reg.mex.SnL)
reg.mex.SnL.plot <- ggplot(mex, aes(x = SL, y = SnL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.SnL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.OL <- lm(mex$OL_log ~ mex$SL)
sd.mex.OL <- rstandard(reg.mex.OL)
reg.mex.OL.plot <- ggplot(mex, aes(x = SL, y = OL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.OL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.FLA <- lm(mex$FLA ~ mex$SL)
sd.mex.FLA <- rstandard(reg.mex.FLA)
reg.mex.FLA.plot <- ggplot(mex, aes(x = SL, y = FLA)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.FLA.plot
## `geom_smooth()` using formula = 'y ~ x'

Residuals

  • STEP TWO: get residuals for each individual for traits that were influenced by body size

  • STEP THREE: convert residuals to absolute value

##### LAT #####

abs.lat.D <- abs(res.lat.D)
mean(abs.lat.D)
## [1] 0.5375505
abs.lat.P1 <- abs(res.lat.P1)
mean(abs.lat.P1)
## [1] 0.5466667
abs.lat.P1.R <- abs(res.lat.P1.R)
mean(abs.lat.P1.R)
## [1] 0.5799393
abs.lat.LLSC <- abs(res.lat.LLSC)
mean(abs.lat.LLSC)
## [1] 0.7664044
abs.lat.SBLL <- abs(res.lat.SBLL)
mean(abs.lat.SBLL)
## [1] 0.2461336
abs.lat.BD <- abs(res.lat.BD)
mean(abs.lat.BD)
## [1] 0.02728166
abs.lat.CPD <- abs(res.lat.CPD)
mean(abs.lat.CPD)
## [1] 0.02321306
abs.lat.CPL <- abs(res.lat.CPL)
mean(abs.lat.CPL)
## [1] 0.02482066
abs.lat.PreDL <- abs(res.lat.PreDL)
mean(abs.lat.PreDL)
## [1] 0.0452001
abs.lat.DbL <- abs(res.lat.DbL)
mean(abs.lat.DbL)
## [1] 0.03444377
abs.lat.HL <- abs(res.lat.HL)
mean(abs.lat.HL)
## [1] 0.02817285
abs.lat.HD <- abs(res.lat.HD)
mean(abs.lat.HD)
## [1] 0.02584031
abs.lat.HW <- abs(res.lat.HW)
mean(abs.lat.HW)
## [1] 0.02131838
abs.lat.SnL <- abs(res.lat.SnL)
mean(abs.lat.SnL)
## [1] 0.01696166
abs.lat.OL <- abs(res.lat.OL)
mean(abs.lat.OL)
## [1] 0.03227227
##### FORM #####

abs.form.D <- abs(res.form.D)
mean(abs.form.D)
## [1] 0.5668177
abs.form.P1 <- abs(res.form.P1)
mean(abs.form.P1)
## [1] 0.4843616
abs.form.P1.R <- abs(res.form.P1.R)
mean(abs.form.P1.R)
## [1] 0.4242033
abs.form.LLSC <- abs(res.form.LLSC)
mean(abs.form.LLSC)
## [1] 0.9038801
abs.form.SBLL <- abs(res.form.SBLL)
mean(abs.form.SBLL)
## [1] 0.3399272
abs.form.BD <- abs(res.form.BD)
mean(abs.form.BD)
## [1] 0.0398012
abs.form.CPD <- abs(res.form.CPD)
mean(abs.form.CPD)
## [1] 0.02256173
abs.form.CPL <- abs(res.form.CPL)
mean(abs.form.CPL)
## [1] 0.02460215
abs.form.PreDL <- abs(res.form.PreDL)
mean(abs.form.PreDL)
## [1] 0.04187967
abs.form.DbL <- abs(res.form.DbL)
mean(abs.form.DbL)
## [1] 0.03106601
abs.form.HL <- abs(res.form.HL)
mean(abs.form.HL)
## [1] 0.02847209
abs.form.HD <- abs(res.form.HD)
mean(abs.form.HD)
## [1] 0.02490519
abs.form.HW <- abs(res.form.HW)
mean(abs.form.HW)
## [1] 0.02254712
abs.form.SnL <- abs(res.form.SnL)
mean(abs.form.SnL)
## [1] 0.01561874
abs.form.OL <- abs(res.form.OL)
mean(abs.form.OL)
## [1] 0.03648207
##### MEX #####

abs.mex.D <- abs(res.mex.D)
mean(abs.mex.D)
## [1] 0.1657275
abs.mex.P1 <- abs(res.mex.P1)
mean(abs.mex.P1)
## [1] 0.5686425
abs.mex.P1.R <- abs(res.mex.P1.R)
mean(abs.mex.P1.R)
## [1] 0.458723
abs.mex.LLSC <- abs(res.mex.LLSC)
mean(abs.mex.LLSC)
## [1] 0.4434954
abs.mex.SBLL <- abs(res.mex.SBLL)
mean(abs.mex.SBLL)
## [1] 0.2713231
abs.mex.BD <- abs(res.mex.BD)
mean(abs.mex.BD)
## [1] 0.01757864
abs.mex.CPD <- abs(res.mex.CPD)
mean(abs.mex.CPD)
## [1] 0.02039358
abs.mex.CPL <- abs(res.mex.CPL)
mean(abs.mex.CPL)
## [1] 0.016549
abs.mex.PreDL <- abs(res.mex.PreDL)
mean(abs.mex.PreDL)
## [1] 0.03816268
abs.mex.DbL <- abs(res.mex.DbL)
mean(abs.mex.DbL)
## [1] 0.01463372
abs.mex.HL <- abs(res.mex.HL)
mean(abs.mex.HL)
## [1] 0.02708829
abs.mex.HD <- abs(res.mex.HD)
mean(abs.mex.HD)
## [1] 0.02589485
abs.mex.HW <- abs(res.mex.HW)
mean(abs.mex.HW)
## [1] 0.01729913
abs.mex.SnL <- abs(res.mex.SnL)
mean(abs.mex.SnL)
## [1] 0.03130948
abs.mex.OL <- abs(res.mex.OL)
mean(abs.mex.OL)
## [1] 0.03579061
#let's get this into the raw1 data set so that we can plot this more easily

abs.res.D <- c(abs.lat.D, abs.form.D, abs.mex.D)
abs.res.P1 <- c(abs.lat.P1, abs.form.P1, abs.mex.P1)
abs.res.P1.R <- c(abs.lat.P1.R, abs.form.P1.R, abs.mex.P1.R)
abs.res.LLSC<- c(abs.lat.LLSC, abs.form.LLSC, abs.mex.LLSC)
abs.res.SBLL<- c(abs.lat.SBLL, abs.form.SBLL, abs.mex.SBLL)
abs.res.BD<- c(abs.lat.BD, abs.form.BD, abs.mex.BD)
abs.res.CPD<- c(abs.lat.CPD, abs.form.CPD, abs.mex.CPD)
abs.res.CPL<- c(abs.lat.CPL, abs.form.CPL, abs.mex.CPL)
abs.res.PreDL <- c(abs.lat.PreDL, abs.form.PreDL, abs.mex.PreDL)
abs.res.DbL <- c(abs.lat.DbL, abs.form.DbL, abs.mex.DbL)
abs.res.HL<- c(abs.lat.HL, abs.form.HL, abs.mex.HL)
abs.res.HD<- c(abs.lat.HD, abs.form.HD, abs.mex.HD)
abs.res.HW <- c(abs.lat.HW, abs.form.HW, abs.mex.HW)
abs.res.SnL <- c(abs.lat.SnL, abs.form.SnL, abs.mex.SnL)
abs.res.OL <- c(abs.lat.OL, abs.form.OL, abs.mex.OL)


raw2 <- cbind(raw1, abs.res.D, abs.res.P1, abs.res.P1.R, abs.res.LLSC, abs.res.SBLL, abs.res.BD, abs.res.CPD, abs.res.CPL, abs.res.PreDL, abs.res.DbL, abs.res.HL, abs.res.HD, abs.res.HW, abs.res.SnL, abs.res.OL)

Residual Comparisons

  • STEP FOUR: plot the mean of the |residuals| for both species, for all traits influenced by body size
library(ggbeeswarm)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
ggplot(raw2, aes(SPP, abs.res.D)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
## Warning: Computation failed in `stat_summary()`
## Caused by error in `mean_cl_normal()`:
## ! The package "Hmisc" is required.

scatter_violin <- ggplot(data=raw2, aes(x=SPP, y=abs.res.D)) +
  geom_violin(trim = FALSE) + 
  stat_summary(
    fun.data = "mean_sdl",  fun.args = list(mult = 1),
    geom = "pointrange", color = "black"
    )

print(scatter_violin)
## Warning: Computation failed in `stat_summary()`
## Caused by error in `fun.data()`:
## ! The package "Hmisc" is required.

scatter_violin1 <- ggplot(data=raw2, aes(x=SPP, y=abs.res.D)) +
  geom_violin(trim = FALSE) + 
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="crossbar", fill="red", width=0.03)

print(scatter_violin1)
## Warning: Computation failed in `stat_summary()`
## Caused by error in `mean_cl_normal()`:
## ! The package "Hmisc" is required.

ggplot(raw2, aes(SPP, abs.res.P1)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
## Warning: Computation failed in `stat_summary()`
## Caused by error in `mean_cl_normal()`:
## ! The package "Hmisc" is required.

ggplot(raw2, aes(SPP, abs.res.P1.R)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3) 
## Warning: Computation failed in `stat_summary()`
## Caused by error in `mean_cl_normal()`:
## ! The package "Hmisc" is required.

ggplot(raw2, aes(SPP, abs.res.LLSC)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
## Warning: Computation failed in `stat_summary()`
## Caused by error in `mean_cl_normal()`:
## ! The package "Hmisc" is required.

ggplot(raw2, aes(SPP, abs.res.SBLL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
## Warning: Computation failed in `stat_summary()`
## Caused by error in `mean_cl_normal()`:
## ! The package "Hmisc" is required.

ggplot(raw2, aes(SPP, abs.res.BD)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
## Warning: Computation failed in `stat_summary()`
## Caused by error in `mean_cl_normal()`:
## ! The package "Hmisc" is required.

ggplot(raw2, aes(SPP, abs.res.CPD)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
## Warning: Computation failed in `stat_summary()`
## Caused by error in `mean_cl_normal()`:
## ! The package "Hmisc" is required.

ggplot(raw2, aes(SPP, abs.res.CPL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3) 
## Warning: Computation failed in `stat_summary()`
## Caused by error in `mean_cl_normal()`:
## ! The package "Hmisc" is required.

ggplot(raw2, aes(SPP, abs.res.PreDL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
## Warning: Computation failed in `stat_summary()`
## Caused by error in `mean_cl_normal()`:
## ! The package "Hmisc" is required.

ggplot(raw2, aes(SPP, abs.res.DbL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
## Warning: Computation failed in `stat_summary()`
## Caused by error in `mean_cl_normal()`:
## ! The package "Hmisc" is required.

ggplot(raw2, aes(SPP, abs.res.HL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
## Warning: Computation failed in `stat_summary()`
## Caused by error in `mean_cl_normal()`:
## ! The package "Hmisc" is required.

ggplot(raw2, aes(SPP, abs.res.HD)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
## Warning: Computation failed in `stat_summary()`
## Caused by error in `mean_cl_normal()`:
## ! The package "Hmisc" is required.

ggplot(raw2, aes(SPP, abs.res.HW)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
## Warning: Computation failed in `stat_summary()`
## Caused by error in `mean_cl_normal()`:
## ! The package "Hmisc" is required.

ggplot(raw2, aes(SPP, abs.res.SnL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
## Warning: Computation failed in `stat_summary()`
## Caused by error in `mean_cl_normal()`:
## ! The package "Hmisc" is required.

ggplot(raw2, aes(SPP, abs.res.OL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)
## Warning: Computation failed in `stat_summary()`
## Caused by error in `mean_cl_normal()`:
## ! The package "Hmisc" is required.

Comparing variation (t- and F-tests)

(note: did non-parametric instead of transforming, but Mike said to transform, so I copied that work to the ‘morphology-analysis-final’ Rmd).

F-test on residuals doesn’t make much sense; the residuals themselves are representative of the variation present, as they are the distance from the mean. Therefore, F-test on residuals is like variance of the variance. Instead, I have to do a t-test on the absolute value of the residuals. In this sense, we want to see if the mean of the absolute residuals is higher or lower for asexual species–is the average amount of variation higher or lower for this trait? Based on the regressions, if the trait was influenced by body size, I will perform a t-test on the absolute value of the residuals. If the trait was not influenced by body size, I will perform an F-test of variance on the raw data.

Quick results summary: For the F-test on raw data, none of the traits were significantly different (P2L/R, A, SBDF, FLA). For the t-tests on residuals, the only significant traits are left pectoral (), right pectoral (lat>form), scales above lateral line (), scales below lateral line (form>lat), and head length ().

-   will do two-tailed and check out the residual means to infer direction; for traits in which we use raw data, a one-tailed f-test will be perfomed in both direction to determine which species is varying more. We will also visulize the variation using a histogram to confirm direction results.

F tests to compare species

This will only be for the traits that did not have a significant regression compared to SL (so P2s, A, SALL, SBDF, and FLA); none of these traits were transformed, so just doing raw for now.

## 
##  F test to compare two variances
## 
## data:  lat$P2.L and form$P2.L
## F = NaN, num df = 133, denom df = 165, p-value = NA
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  NaN NaN
## sample estimates:
## ratio of variances 
##                NaN
## 
##  F test to compare two variances
## 
## data:  lat$P2.R and form$P2.R
## F = NaN, num df = 133, denom df = 165, p-value = NA
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  NaN NaN
## sample estimates:
## ratio of variances 
##                NaN
## 
##  F test to compare two variances
## 
## data:  lat$A and form$A
## F = 1.1322, num df = 133, denom df = 165, p-value = 0.4474
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.8210231 1.5703926
## sample estimates:
## ratio of variances 
##           1.132245
## 
##  F test to compare two variances
## 
## data:  lat$SALL and form$SALL
## F = 1.625, num df = 133, denom df = 165, p-value = 0.003095
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  1.178316 2.253797
## sample estimates:
## ratio of variances 
##           1.624976
## 
##  F test to compare two variances
## 
## data:  lat$SBDF and form$SBDF
## F = 0.76031, num df = 133, denom df = 165, p-value = 0.1003
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.5513254 1.0545347
## sample estimates:
## ratio of variances 
##          0.7603142
## 
##  F test to compare two variances
## 
## data:  lat$FLA and form$FLA
## F = 0.73321, num df = 133, denom df = 165, p-value = 0.06289
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.5316692 1.0169377
## sample estimates:
## ratio of variances 
##           0.733207





T tests to compare species

  • will do two-tailed and check out the means to infer direction (see beginning for trait means w/o body correction)
  • this will be for the traits that did have a significant regression compared to SL; will be using the absolute value residuals.
  • For continuous traits, we will be using the log transformed data.
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.D and abs.form.D
## t = -0.61099, df = 263.7, p-value = 0.5417
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.12358398  0.06504966
## sample estimates:
## mean of x mean of y 
## 0.5375505 0.5668177
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.P1 and abs.form.P1
## t = 1.2856, df = 291.88, p-value = 0.1996
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.03307996  0.15769016
## sample estimates:
## mean of x mean of y 
## 0.5466667 0.4843616
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.P1.R and abs.form.P1.R
## t = 3.4656, df = 297.09, p-value = 0.0006071
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.06729909 0.24417289
## sample estimates:
## mean of x mean of y 
## 0.5799393 0.4242033
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.LLSC and abs.form.LLSC
## t = -1.7461, df = 297.86, p-value = 0.08182
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.29241758  0.01746627
## sample estimates:
## mean of x mean of y 
## 0.7664044 0.9038801
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.SBLL and abs.form.SBLL
## t = -2.2128, df = 289.71, p-value = 0.02769
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.17721884 -0.01036833
## sample estimates:
## mean of x mean of y 
## 0.2461336 0.3399272
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.BD and abs.form.BD
## t = -2.0222, df = 198.78, p-value = 0.04449
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.0247278114 -0.0003112757
## sample estimates:
##  mean of x  mean of y 
## 0.02728166 0.03980120
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.CPD and abs.form.CPD
## t = 0.28712, df = 281.58, p-value = 0.7742
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.003813911  0.005116555
## sample estimates:
##  mean of x  mean of y 
## 0.02321306 0.02256173
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.CPL and abs.form.CPL
## t = 0.087166, df = 296.73, p-value = 0.9306
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.004714904  0.005151923
## sample estimates:
##  mean of x  mean of y 
## 0.02482066 0.02460215
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.PreDL and abs.form.PreDL
## t = 0.60271, df = 195.57, p-value = 0.5474
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.007544556  0.014185418
## sample estimates:
##  mean of x  mean of y 
## 0.04520010 0.04187967
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.DbL and abs.form.DbL
## t = 1.0496, df = 296.33, p-value = 0.2947
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.002955384  0.009710919
## sample estimates:
##  mean of x  mean of y 
## 0.03444377 0.03106601
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.HL and abs.form.HL
## t = -0.11233, df = 287.94, p-value = 0.9106
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.005542244  0.004943775
## sample estimates:
##  mean of x  mean of y 
## 0.02817285 0.02847209
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.HD and abs.form.HD
## t = 0.33624, df = 245.38, p-value = 0.737
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.004542843  0.006413090
## sample estimates:
##  mean of x  mean of y 
## 0.02584031 0.02490519
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.HW and abs.form.HW
## t = -0.57582, df = 294.93, p-value = 0.5652
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.005428312  0.002970837
## sample estimates:
##  mean of x  mean of y 
## 0.02131838 0.02254712
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.SnL and abs.form.SnL
## t = 0.84322, df = 280.1, p-value = 0.3998
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.001792088  0.004477944
## sample estimates:
##  mean of x  mean of y 
## 0.01696166 0.01561874
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.OL and abs.form.OL
## t = -1.2257, df = 296.87, p-value = 0.2213
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.010969080  0.002549471
## sample estimates:
##  mean of x  mean of y 
## 0.03227227 0.03648207

Summary of variance results

average.table <- matrix(c(ttest.abs.D$p.value, mean(abs.lat.D, na.rm = TRUE), mean(abs.form.D, na.rm = TRUE), ttest.abs.P1$p.value, mean(abs.lat.P1, na.rm = TRUE), mean(abs.form.P1, na.rm = TRUE), var.rig.pel$p.value, mean(lat$P2.R, na.rm = TRUE), mean(form$P2.R, na.rm = TRUE), var.left.pel$p.value, mean(lat$P2.L, na.rm = TRUE), mean(form$P2.L, na.rm = TRUE), var.anal$p.value, mean(lat$A, na.rm = TRUE), mean(form$A, na.rm = TRUE), ttest.abs.P1.R$p.value, mean(abs.lat.P1.R, na.rm = TRUE), mean(abs.form.P1.R, na.rm = TRUE), ttest.abs.LLSC$p.value, mean(abs.lat.LLSC, na.rm = TRUE), mean(abs.form.LLSC, na.rm = TRUE), var.sca.ab.ll$p.value, mean(lat$SALL, na.rm = TRUE), mean(form$SALL, na.rm = TRUE), ttest.abs.SBLL$p.value, mean(abs.lat.SBLL, na.rm = TRUE), mean(abs.form.SBLL, na.rm = TRUE), var.sca.df$p.value, mean(lat$SBDF, na.rm = TRUE), mean(form$SBDF, na.rm = TRUE), ttest.abs.BD$p.value, mean(abs.lat.BD, na.rm = TRUE), mean(abs.form.BD, na.rm = TRUE), ttest.abs.CPD$p.value, mean(abs.lat.CPD, na.rm = TRUE), mean(abs.form.CPD, na.rm = TRUE), ttest.abs.CPL$p.value, mean(abs.lat.CPL, na.rm = TRUE), mean(abs.form.CPL, na.rm = TRUE), ttest.abs.PreDL$p.value, mean(abs.lat.PreDL, na.rm = TRUE), mean(abs.form.PreDL, na.rm = TRUE), ttest.abs.DbL$p.value, mean(abs.lat.DbL, na.rm = TRUE), mean(abs.form.DbL, na.rm = TRUE), ttest.abs.HL$p.value, mean(abs.lat.HL, na.rm = TRUE), mean(abs.form.HL, na.rm = TRUE), ttest.abs.HD$p.value, mean(abs.lat.HD, na.rm = TRUE), mean(abs.form.HD, na.rm = TRUE), ttest.abs.HW$p.value, mean(abs.lat.HW, na.rm = TRUE), mean(abs.form.HW, na.rm = TRUE), ttest.abs.SnL$p.value, mean(abs.lat.SnL, na.rm = TRUE), mean(abs.form.SnL, na.rm = TRUE), ttest.abs.OL$p.value, mean(abs.lat.OL, na.rm = TRUE), mean(abs.form.OL, na.rm = TRUE), var.flu.asy$p.value, mean(lat$FLA, na.rm = TRUE), mean(form$FLA, na.rm = TRUE)), ncol=3, byrow=TRUE)

colnames(average.table)<- c('p-value for variance', 'lat mean', 'form mean')

rownames(average.table) <- c('dorsal rays', 'L pect rays', 'R pelvic rays*', 'L pelvic rays*', 'Anal rays', 'R pect rays*', 'lat line scales', 'scales above LL', 'scales below LL', 'scales before dor*', 'body dep', 'peduncle dep', 'peduncle leng', 'pre-dorsal', 'dorsal base', 'head lengtt', 'head depth', 'head width', 'snout leng', 'orbital leng', 'fluct. asymmetries')

average.table
##                    p-value for variance   lat mean  form mean
## dorsal rays                0.5417294696 0.53755052 0.56681768
## L pect rays                0.1996120474 0.54666674 0.48436164
## R pelvic rays*                      NaN 6.00000000 6.00000000
## L pelvic rays*                      NaN 6.00000000 6.00000000
## Anal rays                  0.4474165527 9.66417910 9.69879518
## R pect rays*               0.0006070862 0.57993929 0.42420330
## lat line scales            0.0818216807 0.76640440 0.90388006
## scales above LL            0.0030951141 3.17910448 3.23493976
## scales below LL            0.0276911181 0.24613364 0.33992722
## scales before dor*         0.1002870398 8.55970149 9.72289157
## body dep                   0.0444906647 0.02728166 0.03980120
## peduncle dep               0.7742280655 0.02321306 0.02256173
## peduncle leng              0.9305986176 0.02482066 0.02460215
## pre-dorsal                 0.5473990768 0.04520010 0.04187967
## dorsal base                0.2947444609 0.03444377 0.03106601
## head lengtt                0.9106373518 0.02817285 0.02847209
## head depth                 0.7369798658 0.02584031 0.02490519
## head width                 0.5651750029 0.02131838 0.02254712
## snout leng                 0.3998242013 0.01696166 0.01561874
## orbital leng               0.2212832721 0.03227227 0.03648207
## fluct. asymmetries         0.0628880109 1.32835821 1.43373494

Visualization of significant results

plot_multi_histogram <- function(df, feature, label_column) {
    plt <- ggplot(df, aes(x=eval(parse(text=feature)), fill=eval(parse(text=label_column)))) +
    geom_histogram(bins=10, alpha=0.4, position="identity", aes(y = ..density..), color="black") +
    geom_density(alpha=0.2)  +
    labs(x=feature, y = "Density")
    plt + guides(fill=guide_legend(title=label_column))
}

plot_multi_histogram(raw2, "abs.res.P1.R", "SPP")
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

plot_multi_histogram(raw2, "abs.res.SBLL", "SPP")

plot_multi_histogram(raw2, "abs.res.DbL", "SPP")

plot_multi_histogram(raw2, "abs.res.HL", "SPP")

plot_multi_histogram(raw2, "FLA", "SPP")

plot_multi_histogram(raw2, "A", "SPP")

ANOVA

Will run an ANOVA on the residuals with location and species as fixed effects. This will show me if morphology depends on the species, the location, and if the location and species interact to determine morphology. (Not sure what to do with the rest of the data…)

I will first run this using the zones as the location factor. Zones (1-4) represent the latitude range with equivalent sample sizes in each, since the collections were not equally representative of all latitudes, and I wanted to avoid a sampling bias when randomly selecting samples. Zone 1 corresponds to the southern most latitude range, and zone 4 corresponds to the northern most latitude range.

I will then run the same analysis using basin as the location factor. Since fish are physically isolated to the river basins they occupy, the genetic variation is also limited to that basin. Thus it is possible for fish within the same basin to be more similar due to genetic/physical constraints. (will also do with watershed just to see).

Lastly I will run ANOVAs with both zones and basins but with standardized residuals. This would allow me to compare overall variation across traits (at least those that are depended on body size) rather than just one trait at a time. Not 100% sure if this is useful (or correct to do), but thought it would be interesting.

Zones

library(ggplot2)

raw3 <- raw2[raw2$SPP !="p.mexicana", ]

A.D <- aov(abs.res.D ~ SPP*QUARTILE, data=raw3)
summary(A.D)
##               Df Sum Sq Mean Sq F value  Pr(>F)    
## SPP            1   0.06  0.0635   0.408 0.52344    
## QUARTILE       3   0.45  0.1510   0.970 0.40715    
## SPP:QUARTILE   3   3.22  1.0719   6.888 0.00017 ***
## Residuals    292  45.44  0.1556                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.D, fill=SPP)) +
  geom_boxplot()

A.P1 <- aov(abs.res.P1 ~ SPP*QUARTILE, data=raw3)
summary(A.P1)
##               Df Sum Sq Mean Sq F value Pr(>F)
## SPP            1   0.29  0.2878   1.554  0.214
## QUARTILE       3   1.10  0.3663   1.977  0.117
## SPP:QUARTILE   3   0.73  0.2427   1.310  0.271
## Residuals    292  54.10  0.1853
A.P1.R <- aov(abs.res.P1.R ~ SPP*QUARTILE, data=raw3)
summary(A.P1.R)
##               Df Sum Sq Mean Sq F value   Pr(>F)    
## SPP            1   1.80  1.7983  11.423 0.000824 ***
## QUARTILE       3   0.13  0.0434   0.276 0.842918    
## SPP:QUARTILE   3   0.06  0.0189   0.120 0.948162    
## Residuals    292  45.97  0.1574                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.P1.R, fill=SPP)) +
  geom_boxplot()

A.LLSC <- aov(abs.res.LLSC ~ SPP*QUARTILE, data=raw3)
summary(A.LLSC)
##               Df Sum Sq Mean Sq F value Pr(>F)  
## SPP            1   1.40  1.4013   2.940 0.0875 .
## QUARTILE       3   1.35  0.4506   0.945 0.4190  
## SPP:QUARTILE   3   3.42  1.1399   2.391 0.0688 .
## Residuals    292 139.19  0.4767                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
A.SBLL <- aov(abs.res.SBLL ~ SPP*QUARTILE, data=raw3)
summary(A.SBLL)
##               Df Sum Sq Mean Sq F value Pr(>F)  
## SPP            1   0.65  0.6523   4.859 0.0283 *
## QUARTILE       3   0.78  0.2591   1.930 0.1248  
## SPP:QUARTILE   3   0.12  0.0390   0.291 0.8320  
## Residuals    292  39.20  0.1342                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.SBLL, fill=SPP)) +
  geom_boxplot()

A.BD <- aov(abs.res.BD ~ SPP*QUARTILE, data=raw3)
summary(A.BD)
##               Df Sum Sq  Mean Sq F value Pr(>F)  
## SPP            1 0.0116 0.011622   3.522 0.0615 .
## QUARTILE       3 0.0172 0.005728   1.736 0.1597  
## SPP:QUARTILE   3 0.0343 0.011437   3.466 0.0167 *
## Residuals    292 0.9634 0.003299                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.BD, fill=SPP)) +
  geom_boxplot()

A.CPD <- aov(abs.res.CPD ~ SPP*QUARTILE, data=raw3)
summary(A.CPD)
##               Df  Sum Sq   Mean Sq F value Pr(>F)
## SPP            1 0.00003 0.0000315   0.082  0.775
## QUARTILE       3 0.00031 0.0001019   0.266  0.850
## SPP:QUARTILE   3 0.00099 0.0003291   0.860  0.462
## Residuals    292 0.11177 0.0003828
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.CPD, fill=SPP)) +
  geom_boxplot()

A.CPL <- aov(abs.res.CPL ~ SPP*QUARTILE, data=raw3)
summary(A.CPL)
##               Df  Sum Sq   Mean Sq F value Pr(>F)  
## SPP            1 0.00000 0.0000035   0.008 0.9301  
## QUARTILE       3 0.00530 0.0017667   3.844 0.0101 *
## SPP:QUARTILE   3 0.00383 0.0012773   2.779 0.0414 *
## Residuals    292 0.13420 0.0004596                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.CPL, fill=SPP)) +
  geom_boxplot()

A.PreDL <- aov(abs.res.PreDL ~ SPP*QUARTILE, data=raw3)
summary(A.PreDL)
##               Df Sum Sq   Mean Sq F value Pr(>F)
## SPP            1 0.0008 0.0008175   0.403  0.526
## QUARTILE       3 0.0025 0.0008373   0.413  0.744
## SPP:QUARTILE   3 0.0029 0.0009565   0.472  0.702
## Residuals    292 0.5918 0.0020269
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.PreDL, fill=SPP)) +
  geom_boxplot()

A.DbL <- aov(abs.res.DbL ~ SPP*QUARTILE, data=raw3)
summary(A.DbL)
##               Df  Sum Sq   Mean Sq F value Pr(>F)  
## SPP            1 0.00085 0.0008460   1.089 0.2976  
## QUARTILE       3 0.00360 0.0012004   1.545 0.2029  
## SPP:QUARTILE   3 0.00529 0.0017636   2.270 0.0805 .
## Residuals    292 0.22684 0.0007768                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.DbL, fill=SPP)) +
  geom_boxplot()

A.HL <- aov(abs.res.HL ~ SPP*QUARTILE, data=raw3)
summary(A.HL)
##               Df  Sum Sq   Mean Sq F value Pr(>F)
## SPP            1 0.00001 0.0000066   0.012  0.911
## QUARTILE       3 0.00005 0.0000154   0.029  0.993
## SPP:QUARTILE   3 0.00254 0.0008478   1.596  0.191
## Residuals    292 0.15516 0.0005314
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.HL, fill=SPP)) +
  geom_boxplot()

A.HD <- aov(abs.res.HD ~ SPP*QUARTILE, data=raw3)
summary(A.HD)
##               Df  Sum Sq   Mean Sq F value Pr(>F)  
## SPP            1 0.00006 0.0000648   0.121 0.7281  
## QUARTILE       3 0.00435 0.0014498   2.707 0.0455 *
## SPP:QUARTILE   3 0.00108 0.0003584   0.669 0.5715  
## Residuals    292 0.15637 0.0005355                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
A.HW <- aov(abs.res.HW ~ SPP*QUARTILE, data=raw3)
summary(A.HW)
##               Df  Sum Sq   Mean Sq F value  Pr(>F)   
## SPP            1 0.00011 0.0001119   0.338 0.56123   
## QUARTILE       3 0.00422 0.0014080   4.256 0.00581 **
## SPP:QUARTILE   3 0.00223 0.0007437   2.248 0.08288 . 
## Residuals    292 0.09661 0.0003309                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.HW, fill=SPP)) +
  geom_boxplot()

A.SnL <- aov(abs.res.SnL ~ SPP*QUARTILE, data=raw3)
summary(A.SnL)
##               Df  Sum Sq   Mean Sq F value Pr(>F)  
## SPP            1 0.00013 0.0001337   0.731 0.3932  
## QUARTILE       3 0.00147 0.0004912   2.686 0.0468 *
## SPP:QUARTILE   3 0.00072 0.0002388   1.306 0.2728  
## Residuals    292 0.05340 0.0001829                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
A.OL <- aov(abs.res.OL ~ SPP*QUARTILE, data=raw3)
summary(A.OL)
##               Df  Sum Sq   Mean Sq F value Pr(>F)  
## SPP            1 0.00131 0.0013141   1.475 0.2256  
## QUARTILE       3 0.00343 0.0011444   1.284 0.2800  
## SPP:QUARTILE   3 0.00563 0.0018780   2.107 0.0994 .
## Residuals    292 0.26021 0.0008911                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.OL, fill=SPP)) +
  geom_boxplot()

Basins

A1.D <- aov(abs.res.D ~ SPP*BASIN, data=raw3)
summary(A1.D)
##              Df Sum Sq Mean Sq F value  Pr(>F)   
## SPP           1   0.06  0.0635   0.402 0.52666   
## BASIN         6   3.12  0.5205   3.293 0.00377 **
## SPP:BASIN     3   0.31  0.1028   0.650 0.58344   
## Residuals   289  45.68  0.1581                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.D, fill=SPP)) +
  geom_boxplot()

A1.P1 <- aov(abs.res.P1 ~ SPP*BASIN, data=raw3)
summary(A1.P1)
##              Df Sum Sq Mean Sq F value Pr(>F)
## SPP           1   0.29  0.2878   1.528  0.217
## BASIN         6   0.97  0.1613   0.856  0.528
## SPP:BASIN     3   0.53  0.1751   0.930  0.427
## Residuals   289  54.43  0.1883
A1.P1.R <- aov(abs.res.P1.R ~ SPP*BASIN, data=raw3)
summary(A1.P1.R)
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## SPP           1   1.80  1.7983  11.421 0.000826 ***
## BASIN         6   0.51  0.0845   0.537 0.780341    
## SPP:BASIN     3   0.15  0.0485   0.308 0.819718    
## Residuals   289  45.50  0.1575                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.P1.R, fill=SPP)) +
  geom_boxplot()

A1.LLSC <- aov(abs.res.LLSC ~ SPP*BASIN, data=raw3)
summary(A1.LLSC)
##              Df Sum Sq Mean Sq F value Pr(>F)  
## SPP           1   1.40  1.4013   2.999 0.0844 .
## BASIN         6   6.83  1.1376   2.435 0.0260 *
## SPP:BASIN     3   2.11  0.7026   1.504 0.2137  
## Residuals   289 135.02  0.4672                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.LLSC, fill=SPP)) +
  geom_boxplot()

A1.SBLL <- aov(abs.res.SBLL ~ SPP*BASIN, data=raw3)
summary(A1.SBLL)
##              Df Sum Sq Mean Sq F value Pr(>F)  
## SPP           1   0.65  0.6523   4.895 0.0277 *
## BASIN         6   1.07  0.1785   1.340 0.2393  
## SPP:BASIN     3   0.51  0.1716   1.288 0.2788  
## Residuals   289  38.51  0.1332                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.SBLL, fill=SPP)) +
  geom_boxplot()

A1.BD <- aov(abs.res.BD ~ SPP*BASIN, data=raw3)
summary(A1.BD)
##              Df Sum Sq  Mean Sq F value Pr(>F)  
## SPP           1 0.0116 0.011622   3.397 0.0663 .
## BASIN         6 0.0122 0.002026   0.592 0.7366  
## SPP:BASIN     3 0.0140 0.004683   1.369 0.2525  
## Residuals   289 0.9887 0.003421                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.BD, fill=SPP)) +
  geom_boxplot()

A1.CPD <- aov(abs.res.CPD ~ SPP*BASIN, data=raw3)
summary(A1.CPD)
##              Df  Sum Sq   Mean Sq F value Pr(>F)  
## SPP           1 0.00003 0.0000315   0.084 0.7716  
## BASIN         6 0.00292 0.0004863   1.305 0.2545  
## SPP:BASIN     3 0.00249 0.0008287   2.225 0.0855 .
## Residuals   289 0.10766 0.0003725                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.CPD, fill=SPP)) +
  geom_boxplot()

A1.CPL <- aov(abs.res.CPL ~ SPP*BASIN, data=raw3)
summary(A1.CPL)
##              Df  Sum Sq   Mean Sq F value  Pr(>F)    
## SPP           1 0.00000 0.0000035   0.008 0.92971    
## BASIN         6 0.01100 0.0018328   4.035 0.00067 ***
## SPP:BASIN     3 0.00106 0.0003547   0.781 0.50546    
## Residuals   289 0.13127 0.0004542                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
A1.PreDL <- aov(abs.res.PreDL ~ SPP*BASIN, data=raw3)
summary(A1.PreDL)
##              Df Sum Sq   Mean Sq F value Pr(>F)
## SPP           1 0.0008 0.0008175   0.406  0.524
## BASIN         6 0.0101 0.0016857   0.838  0.542
## SPP:BASIN     3 0.0055 0.0018434   0.916  0.433
## Residuals   289 0.5816 0.0020124
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.PreDL, fill=SPP)) +
  geom_boxplot()

A1.DbL <- aov(abs.res.DbL ~ SPP*BASIN, data=raw3)
summary(A1.DbL)
##              Df  Sum Sq   Mean Sq F value  Pr(>F)   
## SPP           1 0.00085 0.0008460   1.139 0.28673   
## BASIN         6 0.01425 0.0023751   3.198 0.00469 **
## SPP:BASIN     3 0.00685 0.0022831   3.074 0.02807 * 
## Residuals   289 0.21463 0.0007427                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.DbL, fill=SPP)) +
  geom_boxplot()

A1.HL <- aov(abs.res.HL ~ SPP*BASIN, data=raw3)
summary(A1.HL)
##              Df  Sum Sq   Mean Sq F value Pr(>F)
## SPP           1 0.00001 0.0000066   0.013  0.910
## BASIN         6 0.00503 0.0008378   1.603  0.146
## SPP:BASIN     3 0.00169 0.0005638   1.079  0.358
## Residuals   289 0.15103 0.0005226
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.HL, fill=SPP)) +
  geom_boxplot()

A1.HD <- aov(abs.res.HD ~ SPP*BASIN, data=raw3)
summary(A1.HD)
##              Df  Sum Sq   Mean Sq F value Pr(>F)
## SPP           1 0.00006 0.0000648   0.121  0.728
## BASIN         6 0.00520 0.0008659   1.614  0.143
## SPP:BASIN     3 0.00151 0.0005047   0.941  0.421
## Residuals   289 0.15508 0.0005366
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.HD, fill=SPP)) +
  geom_boxplot()

A1.HW <- aov(abs.res.HW ~ SPP*BASIN, data=raw3)
summary(A1.HW)
##              Df  Sum Sq   Mean Sq F value Pr(>F)  
## SPP           1 0.00011 0.0001119   0.329 0.5667  
## BASIN         6 0.00242 0.0004031   1.185 0.3143  
## SPP:BASIN     3 0.00233 0.0007767   2.283 0.0792 .
## Residuals   289 0.09832 0.0003402                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.HW, fill=SPP)) +
  geom_boxplot()

A1.SnL <- aov(abs.res.SnL ~ SPP*BASIN, data=raw3)
summary(A1.SnL)
##              Df  Sum Sq   Mean Sq F value   Pr(>F)    
## SPP           1 0.00013 0.0001337   0.794   0.3737    
## BASIN         6 0.00542 0.0009036   5.365 2.86e-05 ***
## SPP:BASIN     3 0.00149 0.0004977   2.955   0.0329 *  
## Residuals   289 0.04868 0.0001684                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
A1.OL <- aov(abs.res.OL ~ SPP*BASIN, data=raw3)
summary(A1.OL)
##              Df  Sum Sq   Mean Sq F value Pr(>F)
## SPP           1 0.00131 0.0013141   1.483  0.224
## BASIN         6 0.00941 0.0015688   1.770  0.105
## SPP:BASIN     3 0.00378 0.0012592   1.421  0.237
## Residuals   289 0.25609 0.0008861
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.OL, fill=SPP)) +
  geom_boxplot()

Watersheds

A2.D <- aov(abs.res.D ~ SPP*WATERSHED, data=raw3)
summary(A2.D)
##                Df Sum Sq Mean Sq F value Pr(>F)  
## SPP             1   0.06  0.0635   0.415 0.5198  
## WATERSHED      13   4.33  0.3331   2.178 0.0106 *
## SPP:WATERSHED   5   1.96  0.3922   2.564 0.0274 *
## Residuals     280  42.82  0.1529                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.D, fill=SPP)) +
  geom_boxplot()

A2.P1 <- aov(abs.res.P1 ~ SPP*WATERSHED, data=raw3)
summary(A2.P1)
##                Df Sum Sq Mean Sq F value Pr(>F)  
## SPP             1   0.29  0.2878   1.568 0.2116  
## WATERSHED      13   3.73  0.2868   1.562 0.0957 .
## SPP:WATERSHED   5   0.78  0.1569   0.855 0.5120  
## Residuals     280  51.41  0.1836                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
A2.P1.R <- aov(abs.res.P1.R ~ SPP*WATERSHED, data=raw3)
summary(A2.P1.R)
##                Df Sum Sq Mean Sq F value   Pr(>F)    
## SPP             1   1.80  1.7983  11.145 0.000957 ***
## WATERSHED      13   0.82  0.0629   0.390 0.972416    
## SPP:WATERSHED   5   0.16  0.0322   0.199 0.962521    
## Residuals     280  45.18  0.1614                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.P1.R, fill=SPP)) +
  geom_boxplot()

A2.LLSC <- aov(abs.res.LLSC ~ SPP*WATERSHED, data=raw3)
summary(A2.LLSC)
##                Df Sum Sq Mean Sq F value   Pr(>F)    
## SPP             1   1.40  1.4013   3.299   0.0704 .  
## WATERSHED      13  20.28  1.5596   3.672 2.15e-05 ***
## SPP:WATERSHED   5   4.75  0.9502   2.237   0.0509 .  
## Residuals     280 118.93  0.4248                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.LLSC, fill=SPP)) +
  geom_boxplot()

A2.SBLL <- aov(abs.res.SBLL ~ SPP*WATERSHED, data=raw3)
summary(A2.SBLL)
##                Df Sum Sq Mean Sq F value   Pr(>F)    
## SPP             1   0.65  0.6523   5.611   0.0185 *  
## WATERSHED      13   2.98  0.2289   1.969   0.0233 *  
## SPP:WATERSHED   5   4.57  0.9138   7.861 6.06e-07 ***
## Residuals     280  32.55  0.1162                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.SBLL, fill=SPP)) +
  geom_boxplot()

A2.BD <- aov(abs.res.BD ~ SPP*WATERSHED, data=raw3)
summary(A2.BD)
##                Df Sum Sq  Mean Sq F value Pr(>F)  
## SPP             1 0.0116 0.011622   3.448 0.0644 .
## WATERSHED      13 0.0391 0.003004   0.891 0.5628  
## SPP:WATERSHED   5 0.0321 0.006415   1.903 0.0938 .
## Residuals     280 0.9438 0.003371                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.BD, fill=SPP)) +
  geom_boxplot()

A2.CPD <- aov(abs.res.CPD ~ SPP*WATERSHED, data=raw3)
summary(A2.CPD)
##                Df  Sum Sq   Mean Sq F value Pr(>F)  
## SPP             1 0.00003 0.0000315   0.086 0.7696  
## WATERSHED      13 0.00567 0.0004363   1.192 0.2842  
## SPP:WATERSHED   5 0.00492 0.0009839   2.688 0.0216 *
## Residuals     280 0.10247 0.0003660                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.CPD, fill=SPP)) +
  geom_boxplot()

A2.CPL <- aov(abs.res.CPL ~ SPP*WATERSHED, data=raw3)
summary(A2.CPL)
##                Df  Sum Sq   Mean Sq F value  Pr(>F)   
## SPP             1 0.00000 0.0000035   0.008 0.92948   
## WATERSHED      13 0.01603 0.0012327   2.732 0.00117 **
## SPP:WATERSHED   5 0.00097 0.0001931   0.428 0.82897   
## Residuals     280 0.12634 0.0004512                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
A2.PreDL <- aov(abs.res.PreDL ~ SPP*WATERSHED, data=raw3)
summary(A2.PreDL)
##                Df Sum Sq   Mean Sq F value Pr(>F)
## SPP             1 0.0008 0.0008175   0.397  0.529
## WATERSHED      13 0.0140 0.0010795   0.525  0.909
## SPP:WATERSHED   5 0.0072 0.0014488   0.704  0.621
## Residuals     280 0.5759 0.0020570
A2.DbL <- aov(abs.res.DbL ~ SPP*WATERSHED, data=raw3)
summary(A2.DbL)
##                Df  Sum Sq   Mean Sq F value  Pr(>F)   
## SPP             1 0.00085 0.0008460   1.150 0.28447   
## WATERSHED      13 0.02396 0.0018428   2.505 0.00294 **
## SPP:WATERSHED   5 0.00580 0.0011597   1.576 0.16674   
## Residuals     280 0.20597 0.0007356                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.DbL, fill=SPP)) +
  geom_boxplot()

A2.HL <- aov(abs.res.HL ~ SPP*WATERSHED, data=raw3)
summary(A2.HL)
##                Df  Sum Sq   Mean Sq F value Pr(>F)  
## SPP             1 0.00001 0.0000066   0.013 0.9099  
## WATERSHED      13 0.01131 0.0008699   1.681 0.0645 .
## SPP:WATERSHED   5 0.00151 0.0003019   0.583 0.7129  
## Residuals     280 0.14493 0.0005176                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.HL, fill=SPP)) +
  geom_boxplot()

A2.HD <- aov(abs.res.HD ~ SPP*WATERSHED, data=raw3)
summary(A2.HD)
##                Df  Sum Sq   Mean Sq F value  Pr(>F)   
## SPP             1 0.00006 0.0000648   0.129 0.72016   
## WATERSHED      13 0.01500 0.0011539   2.289 0.00693 **
## SPP:WATERSHED   5 0.00562 0.0011239   2.229 0.05163 . 
## Residuals     280 0.14117 0.0005042                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.HD, fill=SPP)) +
  geom_boxplot()

A2.HW <- aov(abs.res.HW ~ SPP*WATERSHED, data=raw3)
summary(A2.HW)
##                Df  Sum Sq   Mean Sq F value Pr(>F)  
## SPP             1 0.00011 0.0001119   0.346 0.5566  
## WATERSHED      13 0.00894 0.0006878   2.129 0.0128 *
## SPP:WATERSHED   5 0.00365 0.0007307   2.262 0.0486 *
## Residuals     280 0.09047 0.0003231                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.HW, fill=SPP)) +
  geom_boxplot()

A2.SnL <- aov(abs.res.SnL ~ SPP*WATERSHED, data=raw3)
summary(A2.SnL)
##                Df  Sum Sq   Mean Sq F value   Pr(>F)    
## SPP             1 0.00013 0.0001337   0.788 0.375509    
## WATERSHED      13 0.00700 0.0005384   3.172 0.000185 ***
## SPP:WATERSHED   5 0.00107 0.0002138   1.260 0.281528    
## Residuals     280 0.04752 0.0001697                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
A2.OL <- aov(abs.res.OL ~ SPP*WATERSHED, data=raw3)
summary(A2.OL)
##                Df  Sum Sq   Mean Sq F value  Pr(>F)   
## SPP             1 0.00131 0.0013141   1.541 0.21545   
## WATERSHED      13 0.01666 0.0012814   1.503 0.11539   
## SPP:WATERSHED   5 0.01391 0.0027830   3.264 0.00699 **
## Residuals     280 0.23870 0.0008525                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.OL, fill=SPP)) +
  geom_boxplot()

Standardized

The ANOVAs above focus on differences of particular traits as a factor of species and location. If we want to get an idea of variation in general as a factor of species and location, we can standardize the residuals (essentially unitless z-scores of residuals).

sd.res.D <- append(abs(sd.lat.D), abs(sd.form.D)) sd.res.P1 <- append(abs(sd.lat.P1), abs(sd.form.P1)) sd.res.P1.R <- append(abs(sd.lat.P1.R), abs(sd.form.P1.R)) sd.res.LLSC<- append(abs(sd.lat.LLSC), abs(sd.form.LLSC)) sd.res.SBLL<- append(abs(sd.lat.SBLL), abs(sd.form.SBLL)) sd.res.BD<- append(abs(sd.lat.BD), abs(sd.form.BD)) sd.res.CPD<- append(abs(sd.lat.CPD), abs(sd.form.CPD)) sd.res.CPL<- append(abs(sd.lat.CPL), abs(sd.form.CPL)) sd.res.PreDL <- append(abs(sd.lat.PreDL), abs(sd.form.PreDL)) sd.res.DbL <- append(abs(sd.lat.DbL), abs(sd.form.DbL)) sd.res.HL<- append(abs(sd.lat.HL), abs(sd.form.HL)) sd.res.HD<- append(abs(sd.lat.HD), abs(sd.form.HD)) sd.res.HW <- append(abs(sd.lat.HW), abs(sd.form.HW)) sd.res.SnL <- append(abs(sd.lat.SnL), abs(sd.form.SnL)) sd.res.OL <- append(abs(sd.lat.OL), abs(sd.form.OL))

raw4 <- cbind(raw3, sd.res.D, sd.res.P1, sd.res.P1.R, sd.res.LLSC, sd.res.SBLL, sd.res.BD, sd.res.CPD, sd.res.CPL, sd.res.PreDL, sd.res.DbL, sd.res.HL, sd.res.HD, sd.res.HW, sd.res.SnL, sd.res.OL)

raw5 <- cbind(raw4[1:14], stack(raw4[53:68]))

lat.raw5 <- raw5[raw5$SPP == “p.latipinna”,]

form.raw5 <- raw5[raw5$SPP == “p.formosa”,]

######ZONES#####

A3.lat <- aov(values~QUARTILE, data=lat.raw5) summary(A3.lat)

A3.form <- aov(values~QUARTILE, data=form.raw5) summary(A3.form)

#between species

A3 <- aov(values~QUARTILE*SPP, data=raw5) summary(A3)

ggplot(raw5, aes(x=factor(QUARTILE), y=values, fill=SPP)) + geom_boxplot()

######BASINS#####

A4.lat <- aov(values~BASIN, data=lat.raw5) summary(A4.lat)

A4.form <- aov(values~BASIN, data=form.raw5) summary(A4.form)

#between species

A4 <- aov(values~BASIN*SPP, data=raw5) summary(A4)

ggplot(raw5, aes(x=factor(BASIN), y=values, fill=SPP)) + geom_boxplot()

#####WATERSHEDS##### A5.lat <- aov(values~WATERSHED, data=lat.raw5) summary(A5.lat)

A5.form <- aov(values~WATERSHED, data=form.raw5) summary(A5.form)

#between species

A5 <- aov(values~WATERSHED*SPP, data=raw5) summary(A5)

ggplot(raw5, aes(x=factor(WATERSHED), y=values, fill=SPP)) + geom_boxplot()

PCA analysis

LOGAN: CHECK THAT EACH VARIABLES IS NEAR NORMALLY DISTRIBUTED. IF NOT, LOG TRANSFORM BEFORE PCA. ALSO CHECK THAT PCA CALCULATES Z SCORES AND PLOTS BASED ON THAT; IF NOT CONVERT TO Z SCORES THEN RUN PCA.

In this analysis, I will compare the principle components after centering and scaling the data. A PCA analysis will help us determine what aspects of morphology influence the variation in our data the most without worrying about differences in scales/measurements. Currently, data consists of 116 Sailfin and 186 Amazon.

Variable chart:

{r, echo=FALSE}

PCA <- prcomp(raw1[, 10:31], center=TRUE, scale. = TRUE) #includes all 22 traits summary(PCA) loadings <- PCA$rotation loadings[, 1:5]

VM_PCA <- varimax(PCA$rotation) summary(VM_PCA)

{r, echo=FALSE}

library(AMR) library(ggplot2)

library(ggfortify)

evplot <- function(ev) { # Broken stick model (MacArthur 1957) n <- length(ev) bsm <- data.frame(j=seq(1:n), p=0) bsm\(p[1] <- 1/n for (i in 2:n) bsm\)p[i] <- bsm\(p[i-1] + (1/(n + 1 - i)) bsm\)p <- 100*bsm\(p/n # Plot eigenvalues and % of variation for each axis op <- par(mfrow=c(2,1)) barplot(ev, main="Eigenvalues", col="bisque", las=2) abline(h=mean(ev), col="red") legend("topright", "Average eigenvalue", lwd=1, col=2, bty="n") barplot(t(cbind(100*ev/sum(ev), bsm\)p[n:1])), beside=TRUE, main=“% variation”, col=c(“bisque”,2), las=2) legend(“topright”, c(“% eigenvalue”, “Broken stick model”), pch=15, col=c(“bisque”,2), bty=“n”) par(op) }

ev <- PCA$sdev^2 evplot(ev) #according to Kaiser-Guttman criteron, we can use the first 4 PCs, even though the broken stick model shows only the first above the red bar plot… not 100% confident I know what this means, but pretty sure PC1 is body size

plot6<- autoplot(PCA, data = raw1, colour=‘SPP’, loadings=TRUE, loadings.colour=‘navyblue’, loadings.label=TRUE, loadings.label.colour=‘navyblue’, loadings.label.size=5, loadings.label.vjust= 1, loadings.label.hjust= 1.2, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot6

plot6.1<- autoplot(PCA, data = raw1, colour=‘SPP’, loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot6.1

plot7<- autoplot(PCA, data = raw1, colour=‘QUARTILE’, shape=“SPP”, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot7

plot7A<- autoplot(PCA, data = raw1, colour=‘BASIN’, shape=“SPP”, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot7A

plot7B<- autoplot(PCA, data = raw1, colour=‘WATERSHED’, shape=“SPP”, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot7B

plot8<- autoplot(PCA, x=2, y=3, data = raw1, colour=‘SPP’, loadings=TRUE, loadings.colour=‘navyblue’, loadings.label=TRUE, loadings.label.colour=‘navyblue’, loadings.label.size=5, loadings.label.vjust= 1, loadings.label.hjust= 1.2, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot8

plot8.1<- autoplot(PCA, x=2, y=3, data = raw1, colour=‘SPP’, loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot8.1

plot9<- autoplot(PCA, x=2, y=3, data = raw1, colour=‘QUARTILE’, shape=“SPP”, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot9

plot9A<- autoplot(PCA, x=2, y=3, data = raw1, colour=‘BASIN’, shape=“SPP”, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot9A

plot9B<- autoplot(PCA, x=2, y=3, data = raw1, colour=‘WATERSHED’, shape=“SPP”, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot9B

PCA FOR RESITUDALS

not sure this is useful, but will have it here just in case

PCA2 <- prcomp(raw2[, 33:48], center=TRUE, scale. = TRUE) #includes all 22 traits summary(PCA2) loadings1 <- PCA2$rotation loadings1[, 1:5]

plot10<- autoplot(PCA2, x=1, y=2, data = raw2, colour=‘SPP’, loadings=TRUE, loadings.colour=‘navyblue’, loadings.label=TRUE, loadings.label.colour=‘navyblue’, loadings.label.size=5, loadings.label.vjust= 1, loadings.label.hjust= 1.2, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology trait Residuals”) + theme_minimal() plot10

plot11<- autoplot(PCA2, x=1, y=2, data = raw2, colour=‘SPP’, loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot11

Variable chart: